# clear environment
rm(list=ls())

# load packages
library(ggplot2)

# load data set with keyword counts and crime numbers
dat = read.csv("police_metadata.csv")

# create logged ratio on issue level
dat$ratio_log <- log(dat$count_right_kw + 0.5) - log(dat$count_left_kw + 0.5)

# create crime ratio
dat$pmk_ratio_log <- log(dat$pmk_right + 0.5) - log(dat$pmk_left + 0.5)
dat$extreme_crime_ratio_log <- log(dat$extreme_crime_right + 0.5) - log(dat$extreme_crime_left + 0.5)

# impute with federal level crime ratio
bund_df <- dat[dat$jurisdiction=="bund", c("year", "pmk_right", "pmk_left", "extreme_crime_right", "extreme_crime_left")]
bund_df$pmk_ratio_log_fed <- log(bund_df$pmk_right + 0.5) - log(bund_df$pmk_left + 0.5)
bund_df$extreme_crime_ratio_log_fed <- log(bund_df$extreme_crime_right + 0.5) - log(bund_df$extreme_crime_left + 0.5)
bund_df = bund_df[!duplicated(bund_df$year),]
bund_df <- bund_df[,c("year", "pmk_ratio_log_fed", "extreme_crime_ratio_log_fed")]

dat <- merge(dat, bund_df, by="year", all.x=T)

dat$pmk_ratio_log_imp <- ifelse(is.na(dat$pmk_ratio_log), dat$pmk_ratio_log_fed, dat$pmk_ratio_log)
dat$extreme_crime_ratio_log_imp <- ifelse(is.na(dat$extreme_crime_ratio_log), dat$extreme_crime_ratio_log_fed, dat$extreme_crime_ratio_log)

# create bias measures
dat$bias_ratio_pmk <- dat$ratio_log - dat$pmk_ratio_log
dat$bias_ratio_pmk_imp <- dat$ratio_log - dat$pmk_ratio_log_imp
dat$bias_ratio_extreme <- dat$ratio_log - dat$extreme_crime_ratio_log
dat$bias_ratio_extreme_imp <- dat$ratio_log - dat$extreme_crime_ratio_log_imp

# aggregate mean bias by year and union
bias_agg = aggregate(bias_ratio_extreme_imp~year+union, data=dat, FUN=mean)
bias_agg$union = factor(bias_agg$union, levels=c("gdp", "dpolg"))

# plot bias measure over time
ggplot(data=bias_agg, aes(x=year, y=bias_ratio_extreme_imp, 
                          color=factor(union), 
                          fill=factor(union),
                          shape=factor(union))) + 
  geom_point(size=2.5) + 
  #  stat_smooth() +
  geom_line() +
  geom_hline(yintercept=0, linetype="dashed") +
  theme_classic() +
  theme(legend.position = "bottom", text=element_text(size=14),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_color_manual(values=c("firebrick", "dodgerblue"), name="Police Union", labels=c("GdP", "DPolG")) +
  scale_fill_manual(values=c("firebrick", "dodgerblue"), name="Police Union", labels=c("GdP", "DPolG")) +
  scale_shape_manual(values=c(16, 17), name="Police Union", labels=c("GdP", "DPolG")) +
  ylab("Bias") + xlab("") +
  scale_x_continuous(breaks=seq(2000, 2020, 2))

ggsave("FigE1.pdf", width=7, height=5)
